
import requests
import xlwt


from bs4 import BeautifulSoup


#获取页面函数
def getHtmlText(url):
    try:
        #设置请求头
        h = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36"}
        r = requests.get(url,headers = h)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""

#解析页面函数
def parsePage(page):
    try:
        soup = BeautifulSoup(page, "html.parser")
        #print(soup.body.text)

        if soup.body.text.find("html") >= 0:
            p = soup.findAll("p",{'style':'margin-left:0pt; margin-right:0pt; text-align:left'})
            print(p[0])
            return True
        else:
            return False
    except:
        print("解析页面数据时出错")
#保存为Excel
def saveAsExcel(name,list):
    wb = xlwt.Workbook(encoding='utf-8')
    sheet = wb.add_sheet(name,cell_overwrite_ok=True)

    for i in range(0,len(list)):
        for j in range(0,3):
            sheet.write(i, j, list[i][j])
            if list[i][2] != 0:
                sheet.write(i, j, list[i][j])
            else:
                if i != 2:
                    sheet.write(i, j, list[i][j])
    wb.save(name+".xls")
#main
def main():
    start_url = "http://222.180.188.236/cms/view/lm.action?id=1002&pager.offset=0&pageNo="
    targe_url = "http://222.180.188.236/cms/view/wz.action?id="
    page = 3
    article_list = [["企业名称","链接","时间"]]
    date = ""
    address = ""
    for i in range(page):
        if i == 0:
            continue
        url = start_url + str(i)
        html = getHtmlText(url)
        soup = BeautifulSoup(html, "html.parser")
        print("解析链接")
        for article in soup.findAll("a",{'href':'###'}):
            print(article.string)
            url = targe_url + str(article['onclick'])[8:40]
            html = getHtmlText(url)
            if parsePage(html) == True:
                name = article.text.replace(' ', '')
                company = [name,url,0]
                article_list.append(company)
    saveAsExcel("重庆科技学院春招前端",article_list)
main()
